#!/bin/sh
#
# Script to rotate procmail.log (appends it, gzip-compressed, to
# procmail.log.old.gz). New data is converted to SQL INSERT statements and
# inserted into a MySQL table.
#
# Note that this script requires GNU `date', `awk', and `sed'. It also needs
# the MySQL command-line client, `mysql'. It may need to run under bash,
# rather than Bourne shell, too; this has not been tested.
#
# The spam log table has two fields, "stamp" and "type". "stamp" is the date
# and time of the email, and "type" is the type of email:
#
#   SPAM              Message was flagged as spam and a bounce sent by JMBA.
#   SPAM-BOUNCE       A bounce sent by JMBA was returned as undeliverable.
#   SPAM-REPLY        A bounce sent by JMBA was replied to.
#   DELIVERED         The email was delivered normally.
#   RETRAIN-SPAM      User retraining QSF - message was really spam.
#   RETRAIN-NONSPAM   User retraining QSF - message was really non-spam.
#
# The procmail.log file is expected in $LOGDIR (defined below), and it
# should be generated by adding this to the top of your .procmailrc file:
#
#   LOGFILE=$HOME/procmail.log
#   LOGABSTRACT=yes
#
# (replace $HOME/ with whatever you set $LOGDIR to below).
#
# For this script to be able to tell what options you called JMBA and QSF
# with, be sure to put the action options first (i.e. "jmba -b ..." instead
# of "jmba ... -b", "jmba -s ..." not "jmba ... -s", and "qsf -M ..." rather
# than "qsf -d foo -g bar -g baz -a -M").
#
# You will need a table in database $MYSQLDB called $MYSQLTABLE (default is
# "spamlog"). The following schema will do:
#
#   CREATE TABLE `spamlog` (
#     `stamp` TIMESTAMP(14) NOT NULL,
#     `type`  ENUM('SPAM','SPAM-BOUNCE','SPAM-REPLY','DELIVERED',
#                  'RETRAIN-SPAM','RETRAIN-NONSPAM')
#             NOT NULL DEFAULT 'DELIVERED',
#     KEY (`stamp`),
#     KEY (`type`)
#   );
#
# If you run this script regularly (say, once a day) from cron, you can then
# do SQL queries on that table to get a picture of your spam blocking.
#
# Fill in the MYSQL* and LOGDIR variables below to make this script work.
#
# Copyright 2007 Andrew Wood, distributed under the Artistic License.
#

MYSQLUSER=
MYSQLPASS=
MYSQLDB=
MYSQLTABLE=spamlog

LOGDIR=$HOME

# Remove this next line to enable this script!
echo "Please read this script first, copy it, THEN run it."; exit 1

cd $LOGDIR || exit 1

[ -s procmail.log ] || exit 0

rm -f procmail.log.old procmail.log.new
touch procmail.log.new || exit 1
chmod 600 procmail.log.new || exit 1
ln procmail.log procmail.log.old || exit 1
mv -f procmail.log.new procmail.log || exit 1

gzip -9 < procmail.log.old >> procmail.log.old.gz

cat procmail.log.old \
| sed -n \
      -e 's/^From [^ 	]\+  \([0-9A-Za-z_ :-]*\).*$/ \1/p'\
      -e 's/^  Folder: \(.*\)[	 ][	 ]\+.*$/\1/p' \
| sed 's/[	 ]*$//' \
| awk '/^ /{cmd="date -d \"" $0 "\" +%Y-%m-%d_%H:%M:%S";cmd | getline d; close(cmd);}
       /^[^ ]/{print d " " $0}' \
| awk '/jmba -b/{print $1 " SPAM-BOUNCE";next}
       /jmba -v -b/{print $1 " SPAM-BOUNCE";next}
       /jmba -s/{print $1 " SPAM-REPLY";next}
       /jmba -v -s/{print $1 " SPAM-REPLY";next}
       /sendmail/{print $1 " SPAM";next}
       /\/dev\/null/{next}
       /qsf(-mysql)? .* *(-[^ -]*m|--mark-spam)/{print $1 " RETRAIN-SPAM";next}
       /qsf(-mysql)? .* *(-[^ -]*M|--mark-nonspam)/{print $1 " RETRAIN-NONSPAM";next}
       {print $1 " DELIVERED"}' \
| sed -e 's/ /","/' \
      -e 's/_/ /' \
      -e 's/^/INSERT INTO '"$MYSQLTABLE"' (`stamp`,`type`) VALUES ("/' \
      -e 's/$/");/' \
| mysql -s -u $MYSQLUSER -p$MYSQLPASS $MYSQLDB

rm -f procmail.log.old

# EOF
